Data analysis

Set up and read in data

library(DT)
library(rcartocolor)
library(rnaturalearth)
library(gtsummary)
library(ltm)
library(tidyverse)
library(GGally)
library(broom)
library(ggplot2)
theme_set(theme_bw())

The questions in our survey are as follows:

source("utils.R")
valid_data <- read_csv("data/processed_data.csv")
questions <- read_csv("data/questions.csv")

datatable(
  questions,
  options = list(pageLength = 100, scrollY = "400px"),
  rownames = FALSE
)

Demographic characteristics

The demographic characteristics of the participants including gender, age, and country of residence are summarized in Table 1.

demo_data <- valid_data |>
  select(Gender, Age, Country, `Financial stability`)

table1 <- demo_data |>
  # lumping countries with fewer than 4 responses as "Other"
  mutate(Country = fct_lump_min(Country, 5)) |>
  tbl_summary() |>
  modify_caption("**Table 1. Respondent Characteristics**")

table1
Table 1. Respondent Characteristics
Characteristic N = 1351
Gender
    Female 97 (72%)
    Male 34 (25%)
    Prefer not to say 4 (3.0%)
Age 25 (21, 30)
Country
    Belgium 27 (20%)
    China 8 (6.1%)
    Ireland 7 (5.3%)
    United States of America 5 (3.8%)
    Vietnam 47 (36%)
    Other 38 (29%)
    Unknown 3
Financial stability 6.00 (5.00, 7.00)
1 n (%); Median (IQR)
# use map data from rnaturalearth to draw map
my_world <- rnaturalearth::ne_countries(scale = "medium", returnclass = "sf") |>
  select(-geometry) |>
  rename("Country" = name) |>
  full_join(count(demo_data, Country), by = "Country")

continent_data <- my_world |>
  select(Country, continent) |>
  as_tibble() |>
  select(-geometry) |>
  mutate(continent = fct_collapse(continent, "America" = c("North America", "South America")))

Country

my_world |>
  ggplot() +
  geom_sf(aes(fill = n)) +
  coord_sf(crs = "+proj=eqearth +wktext") +
  scale_fill_carto_c(palette = "Sunset") +
  theme(
    panel.background = element_rect(fill = "azure"),
    panel.grid = element_line(color = "#ebebeb"),
    legend.position = c(0.94, 0.75),
    legend.background = element_blank(),
    panel.border = element_rect(fill = NA),
  )

Age and gender

age_breaks <- c(-4, 0, 4, 8)
demo_data |>
  filter(Gender %in% c("Male", "Female")) |>
  count(Age, Gender) |>
  mutate(n = case_when(Gender == "Male" ~ -n, .default = n)) |>
  ggplot() +
  aes(x = Age, y = n, fill = Gender) +
  geom_col() +
  scale_fill_carto_d(palette = "ArmyRose", direction = -1) +
  scale_y_continuous(breaks = age_breaks, labels = abs(age_breaks)) +
  labs(y = NULL, subtitle = "Response counts by gender and age") +
  guides(fill = guide_legend(position = "inside")) +
  theme(legend.position.inside = c(0.8, 0.8))

The majority of our respondents are female and in their 20s.

Financial stability

demo_data |>
  drop_na(`Financial stability`) |>
  ggplot() +
  aes(
    x = `Financial stability`,
    fill = `Financial stability`,
    group = `Financial stability`
  ) +
  geom_bar() +
  scale_fill_carto_c(guide = "none", palette = "Teal") +
  scale_x_continuous(breaks = seq(2, 10, 2)) +
  labs(y = NULL, subtitle = "Financial stability distribution")

Analyses

  1. Cronbach Alpha test for Fiber Awareness (Q1) and Fashion Involvement (Q6).
  2. Descriptive analyses of Purchase Intention (Q2) and Opinion of the fibers (Q4).
  3. Descriptive analyses of the Fiber Knowledge (Q5), Perceived Sustainability (Q3) and Fashion Involvement (Q6).

Responses overview

Internal consistency reliability

How consistent are the responses to the items in the Q1 and Q6 construct?

q1 <- valid_data |>
  select(starts_with("q1_"))
al1 <- cronbach.alpha(q1, CI = TRUE, standardized = TRUE)
al1
## 
## Standardized Cronbach's alpha for the 'q1' data-set
## 
## Items: 4
## Sample units: 135
## alpha: 0.925
## 
## Bootstrap 95% CI based on 1000 samples
##  2.5% 97.5% 
## 0.896 0.945

A Cronbach’s alpha of 0.925 (95% CI: 0.896, 0.945) indicates a very high level of internal consistency among the items in the scale, suggesting that they are all reliably measuring the same underlying construct of fiber awareness before purchase.

q6 <- valid_data |>
  select(starts_with("q6_"))
al6 <- cronbach.alpha(q6, CI = TRUE, standardized = TRUE)
al6
## 
## Standardized Cronbach's alpha for the 'q6' data-set
## 
## Items: 5
## Sample units: 135
## alpha: 0.845
## 
## Bootstrap 95% CI based on 1000 samples
##  2.5% 97.5% 
## 0.794 0.881

A Cronbach’s alpha of 0.845 (95% CI: 0.794, 0.881) indicates a high level of internal consistency among the items in the scale, suggesting that they are all reliably measuring the same underlying construct of fashion involvement.

Distribution of Variables

Opinions and likelihood of a purchase regarding fiber types

q2 <- valid_data |>
  select(starts_with("q2")) |>
  rename_with(~ gsub("q2_sweater_", "", .x)) |>
  mutate(id = row_number()) |>
  pivot_longer(-id) |>
  mutate(name = fct_relevel(name, tolower(sorted_fibers)))

q2 |>
  ggplot() +
  aes(y = value, x = name, fill = name, color = name) +
  geom_violin(alpha = 0.2, adjust = 1.5) +
  geom_jitter(alpha = 0.4, width = 0.2, height = 0.2) +
  scale_fill_carto_d(guide = "none") +
  scale_color_carto_d(guide = "none") +
  scale_y_continuous(breaks = 1:7, labels = sorted_likeliness) +
  labs(
    y = NULL, x = NULL,
    subtitle = "How likely is it that you will buy a sweater made of ..."
  )

Observations: …

Views on environmental impact

q3 <- valid_data |>
  select(starts_with("q3")) |>
  rename_with(~ gsub("q3_impact_", "", .x)) |>
  mutate(id = row_number()) |>
  pivot_longer(-id) |>
  mutate(name = fct_relevel(name, sorted_fibers))

q3 |>
  ggplot() +
  aes(y = value, x = name, fill = name, color = name) +
  geom_violin(alpha = 0.2, adjust = 2) +
  geom_jitter(alpha = 0.4, width = 0.2, height = 0.2) +
  scale_fill_carto_d(guide = "none") +
  scale_color_carto_d(guide = "none") +
  scale_y_continuous(
    breaks = 1:7,
    labels = sorted_impact
  ) +
  labs(y = NULL, x = NULL, subtitle = "How significant is the environmental impact of producing...")

q4_wide <- valid_data |>
  select(starts_with("q4")) |>
  rename_with(~ gsub("q4_opinion_", "", .x))

q4 <- q4_wide |>
  mutate(id = row_number()) |>
  pivot_longer(-id) |>
  mutate(name = fct_relevel(name, sorted_fibers))

q4 |>
  ggplot() +
  aes(y = value, x = name, fill = name, color = name) +
  geom_violin(alpha = 0.2, adjust = 2) +
  geom_jitter(alpha = 0.4, width = 0.2, height = 0.2) +
  scale_fill_carto_d(guide = "none") +
  scale_color_carto_d(guide = "none") +
  scale_y_continuous(
    breaks = 1:7,
    labels = sorted_like
  ) +
  labs(y = NULL, x = NULL, subtitle = "What is your opinion of these fibers?")

Some observations:

  • Cashmere, Wool, Cotton and Linen have similar shape in the “likedness” distribution, but Cashmere and Wool are most liked
  • Viscose received somewhat neutral responses
  • Polyester and Acrylic have similar “dislikedness”, but the response to Polyester are more spread out

Question: Are these rankings associated with each other? e.g. does someone who likes cashmere also like cotton and linen?

We look at a few correlation tests:

ggpairs(q4_wide)

There is statistically significant positive correlation in the rankings between:

  • cashmere and linen (P < 0.001)
  • cashmere and wool (P < 0.001)
  • cashmere and cotton (P < 0.01)
  • cotton and wool (P < 0.01)
  • acrylic and polyester (P < 0.001)

Hmm… maybe this is not the most interesting result…

Respondents’ knowledge of the fiber sources

q5 <- valid_data |>
  select(starts_with("q5_knowledge_")) |>
  rename_with(~ gsub("q5_knowledge_", "", .x)) |>
  mutate(id = row_number()) |>
  pivot_longer(-id) |>
  mutate(
    value = fct_relevel(
      value,
      c("I do not know", "Produced synthetically", "Plant-based", "Animal-based")
    ),
    name = fct_relevel(name, sorted_fibers)
  )

q5 |>
  count(name, value) |>
  mutate(color = if_else(n > 60, "white", "black")) |>
  ggplot() +
  aes(x = name, y = value) +
  geom_tile(aes(fill = n), color = "white") +
  geom_text(aes(label = n, color = color)) +
  scale_color_identity() +
  scale_fill_carto_c(palette = "Teal") +
  labs(x = NULL, y = NULL) +
  theme_minimal() +
  theme(panel.grid.major = element_blank())

Interests in fashion

q6_columns <- c(
  "I like fashion.",
  "I often buy clothes in general.",
  "I read fashion news regularly.",
  "I try to keep my wardrobe\nup-to-date with fashion trends.",
  "I like to shop for clothes."
)
q6 <- valid_data |>
  select(starts_with("q6"), Country) |>
  left_join(continent_data, by = "Country") |>
  select(-Country) |>
  `colnames<-`(c(q6_columns, "continent")) |>
  mutate(id = row_number()) |>
  pivot_longer(-c(id, continent)) |>
  drop_na(continent)

q6 |>
  ggplot() +
  aes(x = value, y = continent, fill = continent, color = continent) +
  facet_wrap(~name, ncol = 5) +
  geom_violin(alpha = 0.2, adjust = 1.5) +
  geom_jitter(alpha = 0.4, width = 0.1, height = 0.1) +
  scale_fill_carto_d(guide = "none", palette = "Safe") +
  scale_color_carto_d(guide = "none", palette = "Safe") +
  labs(y = NULL, x = NULL)

How do these factors affect purchase intention

Purchase intention ~ perceived sustainability + attitude/opinion + knowledge + avg_involvement + avg_awareness

lm_results <- vector("list")
for (type in sorted_fibers) {
  # type can be "Cashmere", "Polyester", etc.
  dat <- valid_data |>
    select(ends_with(tolower(type)), starts_with("avg_"), -contains("knowledge")) |>
    rename_all(~ gsub(type, "", .x, ignore.case = TRUE))

  fit <- lm(q2_sweater_ ~ ., dat)
  lm_results[[type]] <- summary(fit)$coefficients[, "Pr(>|t|)"] |>
    c(glance(fit) |>
      select(df, df.residual, r.squared, adj.r.squared, f.value = statistic, p.value))
  # enframe() |> unnest(value) |> column_to_rownames("name") |>
  # `colnames<-`(type)
}

regression_table <- bind_rows(lm_results) |>
  mutate(fiber_type = names(lm_results), .before = 1)

regression_table |>
  rowwise() |>
  mutate(
    across(c(2:7, "p.value"), \(x) if (x < 0.001) "<0.001" else as.character(round(x, 3))),
    across(r.squared:f.value, \(x) round(x, 3))
  ) |>
  # datatable(
  #   options = list(scrollX = 700, dom = "t"),
  #   rownames = FALSE
  # ) |>
  # formatSignif(columns = 10:12, digits = 3) |>
  # formatStyle(
  #   names(regression_table),
  #   backgroundColor = styleInterval(c(0.05), c("#f3e79b", NA)),
  # ) |>
  # formatStyle(
  #   names(regression_table),
  #   backgroundColor = styleEqual("<0.001", "#f3e79b"),
  # ) |>
  write_csv("data/regression_table.csv")

Do demographic variables affect fiber awareness and fashion involvement?

score_df <- valid_data |>
  select(-starts_with("q")) |>
  left_join(continent_data, by = "Country") |>
  select(-Country)

involve_fit <- summary(lm(
  avg_involvement ~ continent + Age + Gender + `Financial stability`,
  data = score_df
))
involve_fit
## 
## Call:
## lm(formula = avg_involvement ~ continent + Age + Gender + `Financial stability`, 
##     data = score_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2043 -0.8184  0.0337  0.9126  2.5428 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              5.702597   0.488831  11.666  < 2e-16 ***
## continentEurope          0.073462   0.230179   0.319  0.75015    
## continentAmerica        -0.242223   0.504574  -0.480  0.63203    
## Age                     -0.038874   0.013816  -2.814  0.00569 ** 
## GenderMale              -1.410249   0.259602  -5.432 2.79e-07 ***
## GenderPrefer not to say -1.548130   0.647124  -2.392  0.01823 *  
## `Financial stability`   -0.007761   0.060689  -0.128  0.89844    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.265 on 125 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.275,  Adjusted R-squared:  0.2402 
## F-statistic: 7.901 on 6 and 125 DF,  p-value: 3.148e-07

Holding all other variables constant, for each year older, a person’s average score of involvement in fashion is estimated to decrease by approximately 0.04082 (P value = 0.00384). Also, on average, male respondents are estimated to have 1.46 lower involvement score compared to female respondents (P value < 0.001). Geography and financial stability do not have a significant association with a person’s average involvement in fashion.

aware_fit <- summary(lm(
  avg_awareness ~ continent + Age + Gender + `Financial stability`,
  data = score_df
))
aware_fit
## 
## Call:
## lm(formula = avg_awareness ~ continent + Age + Gender + `Financial stability`, 
##     data = score_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0741 -1.1603  0.3172  1.1457  2.5940 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              3.73533    0.61167   6.107 1.19e-08 ***
## continentEurope          0.22479    0.28802   0.780   0.4366    
## continentAmerica         0.42646    0.63137   0.675   0.5006    
## Age                      0.04308    0.01729   2.492   0.0140 *  
## GenderMale              -0.70151    0.32484  -2.160   0.0327 *  
## GenderPrefer not to say -0.11411    0.80974  -0.141   0.8882    
## `Financial stability`    0.00547    0.07594   0.072   0.9427    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.583 on 125 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.07772,    Adjusted R-squared:  0.03345 
## F-statistic: 1.756 on 6 and 125 DF,  p-value: 0.1136

Age has a significant positive association with fiber awareness preference before purchase. A person’s average score for fiber awareness is estimated to increase by approximately 0.045 for each year increase in age (P = 0.01402). Other variables such as gender, geography, and financial stability do not have a significant association with fiber awareness.

score_df |>
  filter(Gender %in% c("Male", "Female")) |>
  ggplot() +
  aes(x = Age, y = avg_involvement) +
  geom_smooth(
    aes(color = Gender),
    fill = "grey80",
    method = "lm",
    formula = "y~x"
  ) +
  geom_jitter(height = 0.05, width = 0.05, alpha = 0.6) +
  facet_wrap(~Gender) +
  scale_color_carto_d(palette = "ArmyRose", direction = -1, guide = "none") +
  labs(
    y = "Average involvement score",
    title = "Fashion involvement",
    subtitle = "\"Do you like fashion?\""
  )

score_df |>
  filter(Gender %in% c("Male", "Female")) |>
  ggplot() +
  aes(x = Age, y = avg_awareness) +
  geom_smooth(
    aes(color = Gender),
    fill = "grey80",
    method = "lm",
    formula = "y~x"
  ) +
  geom_jitter(height = 0.05, width = 0.05, alpha = 0.6) +
  facet_wrap(~Gender) +
  scale_color_carto_d(palette = "ArmyRose", direction = -1, guide = "none") +
  labs(
    y = "Average awareness score",
    title = "Fiber awareness preference before purchase",
    subtitle = "\"Before making a purchase, I like to know the fiber type of the clothing item.\""
  )